import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv(r"C:\Users\Anusha\OneDrive\Desktop\pokemon.csv")
data
| abilities | against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | ... | percentage_male | pokedex_number | sp_attack | sp_defense | speed | type1 | type2 | weight_kg | generation | is_legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ['Overgrow', 'Chlorophyll'] | 1.00 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 1 | 65 | 65 | 45 | grass | poison | 6.9 | 1 | 0 |
| 1 | ['Overgrow', 'Chlorophyll'] | 1.00 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 2 | 80 | 80 | 60 | grass | poison | 13.0 | 1 | 0 |
| 2 | ['Overgrow', 'Chlorophyll'] | 1.00 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 3 | 122 | 120 | 80 | grass | poison | 100.0 | 1 | 0 |
| 3 | ['Blaze', 'Solar Power'] | 0.50 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 4 | 60 | 50 | 65 | fire | NaN | 8.5 | 1 | 0 |
| 4 | ['Blaze', 'Solar Power'] | 0.50 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 5 | 80 | 65 | 80 | fire | NaN | 19.0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 796 | ['Beast Boost'] | 0.25 | 1.0 | 0.5 | 2.0 | 0.5 | 1.0 | 2.0 | 0.5 | 1.0 | ... | NaN | 797 | 107 | 101 | 61 | steel | flying | 999.9 | 7 | 1 |
| 797 | ['Beast Boost'] | 1.00 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 4.0 | 1.0 | 1.0 | ... | NaN | 798 | 59 | 31 | 109 | grass | steel | 0.1 | 7 | 1 |
| 798 | ['Beast Boost'] | 2.00 | 0.5 | 2.0 | 0.5 | 4.0 | 2.0 | 0.5 | 1.0 | 0.5 | ... | NaN | 799 | 97 | 53 | 43 | dark | dragon | 888.0 | 7 | 1 |
| 799 | ['Prism Armor'] | 2.00 | 2.0 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 1.0 | 2.0 | ... | NaN | 800 | 127 | 89 | 79 | psychic | NaN | 230.0 | 7 | 1 |
| 800 | ['Soul-Heart'] | 0.25 | 0.5 | 0.0 | 1.0 | 0.5 | 1.0 | 2.0 | 0.5 | 1.0 | ... | NaN | 801 | 130 | 115 | 65 | steel | fairy | 80.5 | 7 | 1 |
801 rows × 41 columns
print(data.info())
print(data.isnull().sum())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 801 entries, 0 to 800 Data columns (total 41 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 abilities 801 non-null object 1 against_bug 801 non-null float64 2 against_dark 801 non-null float64 3 against_dragon 801 non-null float64 4 against_electric 801 non-null float64 5 against_fairy 801 non-null float64 6 against_fight 801 non-null float64 7 against_fire 801 non-null float64 8 against_flying 801 non-null float64 9 against_ghost 801 non-null float64 10 against_grass 801 non-null float64 11 against_ground 801 non-null float64 12 against_ice 801 non-null float64 13 against_normal 801 non-null float64 14 against_poison 801 non-null float64 15 against_psychic 801 non-null float64 16 against_rock 801 non-null float64 17 against_steel 801 non-null float64 18 against_water 801 non-null float64 19 attack 801 non-null int64 20 base_egg_steps 801 non-null int64 21 base_happiness 801 non-null int64 22 base_total 801 non-null int64 23 capture_rate 801 non-null object 24 classfication 801 non-null object 25 defense 801 non-null int64 26 experience_growth 801 non-null int64 27 height_m 781 non-null float64 28 hp 801 non-null int64 29 japanese_name 801 non-null object 30 name 801 non-null object 31 percentage_male 703 non-null float64 32 pokedex_number 801 non-null int64 33 sp_attack 801 non-null int64 34 sp_defense 801 non-null int64 35 speed 801 non-null int64 36 type1 801 non-null object 37 type2 417 non-null object 38 weight_kg 781 non-null float64 39 generation 801 non-null int64 40 is_legendary 801 non-null int64 dtypes: float64(21), int64(13), object(7) memory usage: 256.7+ KB None abilities 0 against_bug 0 against_dark 0 against_dragon 0 against_electric 0 against_fairy 0 against_fight 0 against_fire 0 against_flying 0 against_ghost 0 against_grass 0 against_ground 0 against_ice 0 against_normal 0 against_poison 0 against_psychic 0 against_rock 0 against_steel 0 against_water 0 attack 0 base_egg_steps 0 base_happiness 0 base_total 0 capture_rate 0 classfication 0 defense 0 experience_growth 0 height_m 20 hp 0 japanese_name 0 name 0 percentage_male 98 pokedex_number 0 sp_attack 0 sp_defense 0 speed 0 type1 0 type2 384 weight_kg 20 generation 0 is_legendary 0 dtype: int64
print(data.describe())
against_bug against_dark against_dragon against_electric \
count 801.000000 801.000000 801.000000 801.000000
mean 0.996255 1.057116 0.968789 1.073970
std 0.597248 0.438142 0.353058 0.654962
min 0.250000 0.250000 0.000000 0.000000
25% 0.500000 1.000000 1.000000 0.500000
50% 1.000000 1.000000 1.000000 1.000000
75% 1.000000 1.000000 1.000000 1.000000
max 4.000000 4.000000 2.000000 4.000000
against_fairy against_fight against_fire against_flying \
count 801.000000 801.000000 801.000000 801.000000
mean 1.068976 1.065543 1.135456 1.192884
std 0.522167 0.717251 0.691853 0.604488
min 0.250000 0.000000 0.250000 0.250000
25% 1.000000 0.500000 0.500000 1.000000
50% 1.000000 1.000000 1.000000 1.000000
75% 1.000000 1.000000 2.000000 1.000000
max 4.000000 4.000000 4.000000 4.000000
against_ghost against_grass ... height_m hp \
count 801.000000 801.000000 ... 781.000000 801.000000
mean 0.985019 1.034020 ... 1.163892 68.958801
std 0.558256 0.788896 ... 1.080326 26.576015
min 0.000000 0.250000 ... 0.100000 1.000000
25% 1.000000 0.500000 ... 0.600000 50.000000
50% 1.000000 1.000000 ... 1.000000 65.000000
75% 1.000000 1.000000 ... 1.500000 80.000000
max 4.000000 4.000000 ... 14.500000 255.000000
percentage_male pokedex_number sp_attack sp_defense speed \
count 703.000000 801.000000 801.000000 801.000000 801.000000
mean 55.155761 401.000000 71.305868 70.911361 66.334582
std 20.261623 231.373075 32.353826 27.942501 28.907662
min 0.000000 1.000000 10.000000 20.000000 5.000000
25% 50.000000 201.000000 45.000000 50.000000 45.000000
50% 50.000000 401.000000 65.000000 66.000000 65.000000
75% 50.000000 601.000000 91.000000 90.000000 85.000000
max 100.000000 801.000000 194.000000 230.000000 180.000000
weight_kg generation is_legendary
count 781.000000 801.000000 801.000000
mean 61.378105 3.690387 0.087391
std 109.354766 1.930420 0.282583
min 0.100000 1.000000 0.000000
25% 9.000000 2.000000 0.000000
50% 27.300000 4.000000 0.000000
75% 64.800000 5.000000 0.000000
max 999.900000 7.000000 1.000000
[8 rows x 34 columns]
data['type2'] = data['type2'].fillna('NIL')
data['percentage_male'] = data['percentage_male'].fillna(0)
data['weight_kg'] = data['weight_kg'].fillna(data['weight_kg'].mean())
data['height_m'] = data['height_m'].fillna(data['height_m'].mean())
print(data.isnull().sum())
abilities 0 against_bug 0 against_dark 0 against_dragon 0 against_electric 0 against_fairy 0 against_fight 0 against_fire 0 against_flying 0 against_ghost 0 against_grass 0 against_ground 0 against_ice 0 against_normal 0 against_poison 0 against_psychic 0 against_rock 0 against_steel 0 against_water 0 attack 0 base_egg_steps 0 base_happiness 0 base_total 0 capture_rate 0 classfication 0 defense 0 experience_growth 0 height_m 0 hp 0 japanese_name 0 name 0 percentage_male 0 pokedex_number 0 sp_attack 0 sp_defense 0 speed 0 type1 0 type2 0 weight_kg 0 generation 0 is_legendary 0 dtype: int64
print(data.describe())
against_bug against_dark against_dragon against_electric \
count 801.000000 801.000000 801.000000 801.000000
mean 0.996255 1.057116 0.968789 1.073970
std 0.597248 0.438142 0.353058 0.654962
min 0.250000 0.250000 0.000000 0.000000
25% 0.500000 1.000000 1.000000 0.500000
50% 1.000000 1.000000 1.000000 1.000000
75% 1.000000 1.000000 1.000000 1.000000
max 4.000000 4.000000 2.000000 4.000000
against_fairy against_fight against_fire against_flying \
count 801.000000 801.000000 801.000000 801.000000
mean 1.068976 1.065543 1.135456 1.192884
std 0.522167 0.717251 0.691853 0.604488
min 0.250000 0.000000 0.250000 0.250000
25% 1.000000 0.500000 0.500000 1.000000
50% 1.000000 1.000000 1.000000 1.000000
75% 1.000000 1.000000 2.000000 1.000000
max 4.000000 4.000000 4.000000 4.000000
against_ghost against_grass ... height_m hp \
count 801.000000 801.000000 ... 801.000000 801.000000
mean 0.985019 1.034020 ... 1.163892 68.958801
std 0.558256 0.788896 ... 1.066737 26.576015
min 0.000000 0.250000 ... 0.100000 1.000000
25% 1.000000 0.500000 ... 0.600000 50.000000
50% 1.000000 1.000000 ... 1.000000 65.000000
75% 1.000000 1.000000 ... 1.500000 80.000000
max 4.000000 4.000000 ... 14.500000 255.000000
percentage_male pokedex_number sp_attack sp_defense speed \
count 801.000000 801.000000 801.000000 801.000000 801.000000
mean 48.407615 401.000000 71.305868 70.911361 66.334582
std 26.216655 231.373075 32.353826 27.942501 28.907662
min 0.000000 1.000000 10.000000 20.000000 5.000000
25% 50.000000 201.000000 45.000000 50.000000 45.000000
50% 50.000000 401.000000 65.000000 66.000000 65.000000
75% 50.000000 601.000000 91.000000 90.000000 85.000000
max 100.000000 801.000000 194.000000 230.000000 180.000000
weight_kg generation is_legendary
count 801.000000 801.000000 801.000000
mean 61.378105 3.690387 0.087391
std 107.979179 1.930420 0.282583
min 0.100000 1.000000 0.000000
25% 9.300000 2.000000 0.000000
50% 28.500000 4.000000 0.000000
75% 61.500000 5.000000 0.000000
max 999.900000 7.000000 1.000000
[8 rows x 34 columns]
corr = data.corr()
plt.figure(figsize=(28, 26))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.show()
C:\Users\Anusha\AppData\Local\Temp\ipykernel_15256\2694556740.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. corr = data.corr()
from scipy.stats import f_oneway
# Load your dataset into a DataFrame (assuming data is your DataFrame)
# data = pd.read_csv('path_to_pokemon_dataset.csv')
# 1. How are the base stats distributed across different Pokémon types?
plt.figure(figsize=(14, 10))
sns.boxplot(x='type1', y='hp', data=data)
plt.title('Distribution of HP across Pokémon Types')
plt.xlabel('Pokémon Type')
plt.ylabel('HP')
plt.xticks(rotation=90)
plt.show()
# 2. Are there any significant differences in base stats between generations?
gen_stats_comparison = {}
for stat in ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']:
gen_groups = [group[stat] for name, group in data.groupby('generation')]
f_stat, p_value = f_oneway(*gen_groups)
gen_stats_comparison[stat] = {'F-statistic': f_stat, 'p-value': p_value}
print("Comparison of Base Stats between Generations:")
for stat, values in gen_stats_comparison.items():
print(f"{stat}: F-statistic = {values['F-statistic']}, p-value = {values['p-value']}")
# 3. What are the most common primary and secondary types among all Pokémon?
primary_types = data['type1'].value_counts().index[:5]
secondary_types = data['type2'].value_counts().index[:5]
print("Top 5 Most Common Primary Types:", primary_types)
print("Top 5 Most Common Secondary Types:", secondary_types)
# 4. How do the heights and weights of Pokémon vary across different types and generations?
plt.figure(figsize=(14, 6))
sns.scatterplot(x='height_m', y='weight_kg', hue='generation', data=data)
plt.title('Pokémon Heights vs Weights across Generations')
plt.xlabel('Height (m)')
plt.ylabel('Weight (kg)')
plt.legend(title='Generation')
plt.show()
# 5. Are there any patterns or trends in the capture rates of Pokémon?
plt.figure(figsize=(24, 8))
sns.histplot(data['capture_rate'], bins=20, kde=True)
plt.title('Distribution of Pokémon Capture Rates')
plt.xlabel('Capture Rate')
plt.ylabel('Count')
plt.show()
# 6. How do the base stats of legendary Pokémon compare to non-legendary Pokémon?
legendary_stats = data[data['is_legendary'] == True][['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']]
non_legendary_stats = data[data['is_legendary'] == False][['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']]
print("Legendary Pokémon Stats:")
print(legendary_stats.describe())
print("\nNon-Legendary Pokémon Stats:")
print(non_legendary_stats.describe())
# 7. What are the relationships between different base stats (e.g., HP vs. Defense)?
plt.figure(figsize=(8, 6))
sns.scatterplot(x='hp', y='defense', data=data)
plt.title('HP vs Defense')
plt.xlabel('HP')
plt.ylabel('Defense')
plt.show()
Comparison of Base Stats between Generations: hp: F-statistic = 1.8411756695267976, p-value = 0.08842313370826954 attack: F-statistic = 2.5088372059173945, p-value = 0.020652498695911767 defense: F-statistic = 0.8310245792815056, p-value = 0.545987632603192 sp_attack: F-statistic = 1.3965441109485413, p-value = 0.21314318986849756 sp_defense: F-statistic = 1.6370798250197902, p-value = 0.13392388768415722 speed: F-statistic = 1.4793080703163086, p-value = 0.1822617696076044 Top 5 Most Common Primary Types: Index(['water', 'normal', 'grass', 'bug', 'psychic'], dtype='object') Top 5 Most Common Secondary Types: Index(['NIL', 'flying', 'poison', 'ground', 'psychic'], dtype='object')
Legendary Pokémon Stats:
hp attack defense sp_attack sp_defense speed
count 70.000000 70.000000 70.000000 70.000000 70.000000 70.000000
mean 95.428571 109.357143 99.400000 113.757143 101.885714 95.428571
std 30.012351 31.562698 26.929538 34.403587 29.277554 24.671506
min 43.000000 29.000000 31.000000 29.000000 31.000000 37.000000
25% 80.000000 90.000000 89.250000 92.000000 90.000000 85.000000
50% 91.000000 104.000000 100.000000 114.000000 100.000000 98.000000
75% 103.750000 129.750000 115.000000 131.000000 120.000000 108.000000
max 223.000000 181.000000 200.000000 194.000000 200.000000 180.000000
Non-Legendary Pokémon Stats:
hp attack defense sp_attack sp_defense speed
count 731.000000 731.000000 731.000000 731.000000 731.000000 731.000000
mean 66.424077 74.841313 70.481532 67.240766 67.945280 63.548564
std 24.804290 30.576820 29.929838 29.084657 25.958128 27.736456
min 1.000000 5.000000 5.000000 10.000000 20.000000 5.000000
25% 50.000000 53.000000 50.000000 45.000000 50.000000 43.000000
50% 65.000000 70.000000 65.000000 62.000000 65.000000 60.000000
75% 78.000000 95.000000 85.000000 85.000000 85.000000 81.000000
max 255.000000 185.000000 230.000000 175.000000 230.000000 160.000000
# Distribution of base stats
sns.histplot(data['hp'], kde=True)
plt.show()
sns.histplot(data['attack'], kde=True)
plt.show()
sns.histplot(data['defense'], kde=True)
plt.show()
sns.histplot(data['sp_attack'], kde=True)
plt.show()
sns.histplot(data['sp_defense'], kde=True)
plt.show()
# Line plot for average stats by Generation
avg_stats = data.groupby('generation').mean().reset_index()
plt.figure(figsize=(10, 6))
for stat in ['hp', 'attack', 'defense', 'speed', 'sp_attack', 'sp_defense']:
plt.plot(avg_stats['generation'], avg_stats[stat], label=stat)
plt.legend()
plt.xlabel('Generation')
plt.ylabel('Average Value')
plt.title('Average Stats by Generation')
plt.show()
C:\Users\Anusha\AppData\Local\Temp\ipykernel_15256\1292573428.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
avg_stats = data.groupby('generation').mean().reset_index()
import pandas.plotting as pd_plotting
# Scatter plot matrix for base stats
pd_plotting.scatter_matrix(data[['hp', 'attack', 'defense', 'speed', 'sp_attack', 'sp_defense']], figsize=(15, 15))
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Pair plot for base stats
sns.pairplot(data[['hp', 'attack', 'defense', 'speed', 'sp_attack', 'sp_defense']])
plt.show()
# Facet Grid for Attack vs. Defense across generations
g = sns.FacetGrid(data, col="generation", col_wrap=3)
g.map(sns.scatterplot, "attack", "defense")
plt.show()
legendary_stats = data[data['is_legendary'] == True][['type1', 'type2']].describe()
non_legendary_stats = data[data['is_legendary'] == False][['type1', 'type2']].describe()
print("Legendary Pokémon Stats:", legendary_stats)
print("Non-Legendary Pokémon Stats:", non_legendary_stats)
Legendary Pokémon Stats:/t type1 type2 count 70 70 unique 16 15 top psychic NIL freq 17 25 Non-Legendary Pokémon Stats:/t type1 type2 count 731 731 unique 18 19 top water NIL freq 108 359
generation_counts = data['generation'].value_counts().sort_index()
print("Number of Pokémon per Generation:", generation_counts)
sns.lineplot(x=generation_counts.index, y=generation_counts.values)
plt.title('Poke Count by Generation')
plt.xlabel('Generation')
plt.ylabel('Count')
plt.show()
Number of Pokémon per Generation: 1 151 2 100 3 135 4 107 5 156 6 72 7 80 Name: generation, dtype: int64
correlation = data[['attack', 'speed']].corr()
print("Correlation between Attack and Speed:", correlation)
# Scatter plot for Speed vs. Defense
sns.scatterplot(x='speed', y='attack', data = data)
plt.title('Speed vs. Attack')
plt.xlabel('Speed')
plt.ylabel('Attack')
plt.show()
Correlation between Attack and Speed: attack speed attack 1.000000 0.352703 speed 0.352703 1.000000
water_hp = data[data['type1'] == 'water']['hp'].mean()
other_types_hp = data[data['type1'] != 'water']['hp'].mean()
print("Water Type Average HP:", water_hp)
print("Other Types Average HP:", other_types_hp)
Water Type Average HP: 70.21929824561404 Other Types Average HP: 68.74963609898107
avg_speed_by_gen = data.groupby('generation')['speed'].mean()
print("Average Speed by Generation:\n", avg_speed_by_gen)
# Line plot for Average Speed by Generation
sns.lineplot(x=avg_speed_by_gen.index, y=avg_speed_by_gen.values)
plt.title('Average Speed by Generation')
plt.xlabel('Generation')
plt.ylabel('Average Speed')
plt.show()
Average Speed by Generation: generation 1 70.152318 2 61.610000 3 63.577778 4 70.074766 5 66.519231 6 66.652778 7 64.037500 Name: speed, dtype: float64
avg_attack_by_type = data.groupby('type1')['attack'].mean().sort_values(ascending=False)
print("Average Attack by Type:\n", avg_attack_by_type)
# Bar plot for Average Attack by Type
sns.barplot(x=avg_attack_by_type.index, y=avg_attack_by_type.values)
plt.title('Average Attack by Type')
plt.xlabel('Type')
plt.ylabel('Average Attack')
plt.xticks(rotation=90)
plt.show()
Average Attack by Type: type1 dragon 106.407407 fighting 99.178571 ground 94.812500 steel 93.083333 rock 90.666667 dark 87.793103 fire 81.500000 normal 75.161905 grass 73.769231 water 73.307018 ice 73.304348 ghost 72.740741 poison 72.656250 electric 70.820513 bug 70.125000 flying 66.666667 psychic 65.566038 fairy 62.111111 Name: attack, dtype: float64
correlation = data[['speed', 'defense']].corr()
print("Correlation between Speed and Defense:\n", correlation)
# Scatter plot for Speed vs. Defense
sns.scatterplot(x='speed', y='defense', data = data)
plt.title('Speed vs. Defense')
plt.xlabel('Speed')
plt.ylabel('Defense')
plt.show()
Correlation between Speed and Defense:
speed defense
speed 1.000000 0.007934
defense 0.007934 1.000000
avg_defense_by_type = data.groupby('type1')['defense'].mean().sort_values(ascending=False)
print("Average Defence by Type:\n", avg_defense_by_type)
# Bar plot for Average Attack by Type
sns.barplot(x=avg_defense_by_type.index, y=avg_defense_by_type.values)
plt.title('Average Defense by Type')
plt.xlabel('Type')
plt.ylabel('Average Defense')
plt.xticks(rotation=90)
plt.show()
Average Defence by Type: type1 steel 120.208333 rock 96.266667 dragon 86.259259 ground 83.906250 ghost 79.518519 water 73.482456 ice 71.913043 grass 70.871795 bug 70.847222 dark 70.517241 poison 70.031250 psychic 69.264151 fairy 68.166667 fire 67.788462 fighting 66.392857 flying 65.000000 electric 61.820513 normal 59.695238 Name: defense, dtype: float64
avg_hp_by_type = data.groupby('type1')['hp'].mean().sort_values(ascending=False)
print("Average HP by Type:\n", avg_hp_by_type)
# Bar plot for Average Attack by Type
sns.barplot(x=avg_hp_by_type.index, y=avg_hp_by_type.values)
plt.title('Average HP by Type')
plt.xlabel('Type')
plt.ylabel('Average HP')
plt.xticks(rotation=90)
plt.show()
Average HP by Type: type1 dragon 79.851852 normal 76.723810 fairy 73.944444 ground 73.187500 psychic 72.943396 dark 72.551724 ice 72.086957 fighting 71.428571 water 70.219298 fire 68.730769 flying 68.000000 steel 66.791667 rock 66.333333 poison 65.593750 grass 65.358974 ghost 63.370370 electric 60.512821 bug 56.722222 Name: hp, dtype: float64
# Average Speed by Type
avg_speed_by_type = data.groupby('type1')['speed'].mean().sort_values(ascending=False)
print("Average Speed by Type:\n", avg_speed_by_type)
# Bar plot for Average Speed by Type
sns.barplot(x=avg_speed_by_type.index, y=avg_speed_by_type.values)
plt.title('Average Speed by Type')
plt.xlabel('Type')
plt.ylabel('Average Speed')
plt.xticks(rotation=90)
plt.show()
Average Speed by Type: type1 flying 99.666667 electric 85.410256 dragon 76.111111 dark 75.310345 psychic 75.150943 fire 73.346154 normal 69.533333 fighting 64.285714 poison 64.187500 water 63.921053 bug 63.569444 ice 62.739130 ground 59.968750 grass 59.025641 ghost 58.333333 rock 57.422222 steel 56.583333 fairy 53.666667 Name: speed, dtype: float64
# Average Special Attack by Type
avg_sp_attack_by_type = data.groupby('type1')['sp_attack'].mean().sort_values(ascending=False)
print("Average Special Attack by Type:\n", avg_sp_attack_by_type)
# Bar plot for Average Special Attack by Type
sns.barplot(x=avg_sp_attack_by_type.index, y=avg_sp_attack_by_type.values)
plt.title('Average Special Attack by Type')
plt.xlabel('Type')
plt.ylabel('Average Special Attack')
plt.xticks(rotation=90)
plt.show()
Average Special Attack by Type: type1 psychic 92.603774 dragon 89.592593 fire 87.730769 electric 87.538462 flying 84.000000 ghost 82.444444 fairy 81.500000 ice 77.434783 dark 74.517241 grass 74.320513 water 74.061404 steel 72.708333 rock 63.200000 poison 61.562500 normal 56.980952 bug 56.652778 ground 51.937500 fighting 50.107143 Name: sp_attack, dtype: float64
# Average Special Defense by Type
avg_sp_defense_by_type = data.groupby('type1')['sp_defense'].mean().sort_values(ascending=False)
print("Average Special Defense by Type:\n", avg_sp_defense_by_type)
# Bar plot for Average Special Defense by Type
sns.barplot(x=avg_sp_defense_by_type.index, y=avg_sp_defense_by_type.values)
plt.title('Average Special Defense by Type')
plt.xlabel('Type')
plt.ylabel('Average Special Defense')
plt.xticks(rotation=90)
plt.show()
Average Special Defense by Type: type1 fairy 87.777778 psychic 85.735849 dragon 84.555556 steel 82.208333 ghost 78.296296 ice 76.130435 rock 73.377778 water 71.798246 fire 71.538462 electric 70.051282 flying 70.000000 grass 69.230769 dark 69.068966 poison 65.531250 fighting 63.428571 normal 63.200000 bug 62.513889 ground 62.281250 Name: sp_defense, dtype: float64
# Define a list of base stats to compare with base happiness
base_stats = ['capture_rate', 'hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']
# Iterate over the base stats and create correlation and scatter plots
for stat in base_stats:
# Calculate correlation
correlation = data[['base_happiness', stat]].corr()
print(f"Correlation between Base Happiness and {stat.capitalize()}:\n", correlation)
# Scatter plot
sns.scatterplot(x='base_happiness', y=stat, data=data)
plt.title(f'Base Happiness vs. {stat.capitalize()}')
plt.xlabel('Base Happiness')
plt.ylabel(stat.capitalize())
plt.show()
C:\Users\Anusha\AppData\Local\Temp\ipykernel_15256\870604065.py:7: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. correlation = data[['base_happiness', stat]].corr()
Correlation between Base Happiness and Capture_rate:
base_happiness
base_happiness 1.0
Correlation between Base Happiness and Hp:
base_happiness hp
base_happiness 1.000000 -0.108217
hp -0.108217 1.000000
Correlation between Base Happiness and Attack:
base_happiness attack
base_happiness 1.000000 -0.251811
attack -0.251811 1.000000
Correlation between Base Happiness and Defense:
base_happiness defense
base_happiness 1.000000 -0.191503
defense -0.191503 1.000000
Correlation between Base Happiness and Sp_attack:
base_happiness sp_attack
base_happiness 1.000000 -0.228924
sp_attack -0.228924 1.000000
Correlation between Base Happiness and Sp_defense:
base_happiness sp_defense
base_happiness 1.000000 -0.149719
sp_defense -0.149719 1.000000
Correlation between Base Happiness and Speed:
base_happiness speed
base_happiness 1.000000 -0.148753
speed -0.148753 1.000000
# Calculate the average base happiness for Legendary and Non-Legendary Pokémon
avg_happiness_legendary = data[data['is_legendary'] == True]['base_happiness'].mean()
avg_happiness_non_legendary = data[data['is_legendary'] == False]['base_happiness'].mean()
print("Average Base Happiness for Legendary Pokémon:", avg_happiness_legendary)
print("Average Base Happiness for Non-Legendary Pokémon:", avg_happiness_non_legendary)
# Bar plot for Average Base Happiness
sns.barplot(x=['Legendary', 'Non-Legendary'], y=[avg_happiness_legendary, avg_happiness_non_legendary])
plt.title('Average Base Happiness: Legendary vs Non-Legendary Pokémon')
plt.xlabel('Category')
plt.ylabel('Average Base Happiness')
plt.show()
# Perform a t-test to check for statistical significance
legendary_happiness = data[data['is_legendary'] == True]['base_happiness']
non_legendary_happiness = data[data['is_legendary'] == False]['base_happiness']
t_stat, p_value = ttest_ind(legendary_happiness, non_legendary_happiness)
print("T-statistic:", t_stat)
print("P-value:", p_value)
if p_value < 0.05:
print("There is a significant difference in base happiness between Legendary and Non-Legendary Pokémon.")
else:
print("There is no significant difference in base happiness between Legendary and Non-Legendary Pokémon.")
Average Base Happiness for Legendary Pokémon: 39.214285714285715 Average Base Happiness for Non-Legendary Pokémon: 67.86593707250341
T-statistic: -12.82241778643544 P-value: 2.3225400306230407e-34 There is a significant difference in base happiness between Legendary and Non-Legendary Pokémon.
from scipy.stats import f_oneway
avg_happiness_by_type = data.groupby('type1')['base_happiness'].mean().sort_values()
print("Average Base Happiness by Type:\n", avg_happiness_by_type)
# Bar plot for Average Base Happiness by Type
plt.figure(figsize=(12, 6))
sns.barplot(x=avg_happiness_by_type.index, y=avg_happiness_by_type.values)
plt.title('Average Base Happiness by Pokémon Type')
plt.xlabel('Type')
plt.ylabel('Average Base Happiness')
plt.xticks(rotation=90)
plt.show()
# Perform ANOVA to check for statistical significance
type_groups = [group['base_happiness'].values for name, group in data.groupby('type1')]
f_stat, p_value = f_oneway(*type_groups)
print("F-statistic:", f_stat)
print("P-value:", p_value)
if p_value < 0.05:
print("There is a significant difference in base happiness among different Pokémon types.")
else:
print("There is no significant difference in base happiness among different Pokémon types.")
Average Base Happiness by Type: type1 dark 42.241379 dragon 42.962963 steel 50.833333 ghost 58.333333 psychic 63.584906 rock 64.555556 ice 65.434783 bug 67.083333 water 67.543860 grass 67.692308 ground 68.437500 fire 68.461538 electric 68.461538 normal 69.571429 fighting 70.000000 poison 70.937500 flying 76.666667 fairy 77.777778 Name: base_happiness, dtype: float64
F-statistic: 7.8233967908132405 P-value: 2.6609058879442524e-18 There is a significant difference in base happiness among different Pokémon types.
from scipy.stats import pearsonr
# Load your dataset into a DataFrame (assuming data is your DataFrame)
# data = pd.read_csv('path_to_pokemon_dataset.csv')
# Compare Experience Growth with Generation
avg_exp_growth_by_generation = data.groupby('generation')['experience_growth'].mean().sort_values()
print("Average Experience Growth by Generation:\n", avg_exp_growth_by_generation)
# Bar plot for Average Experience Growth by Generation
plt.figure(figsize=(12, 6))
sns.barplot(x=avg_exp_growth_by_generation.index, y=avg_exp_growth_by_generation.values)
plt.title('Average Experience Growth by Generation')
plt.xlabel('Generation')
plt.ylabel('Average Experience Growth')
plt.show()
# Compare Experience Growth with Legendary Status
avg_exp_growth_legendary = data[data['is_legendary'] == True]['experience_growth'].mean()
avg_exp_growth_non_legendary = data[data['is_legendary'] == False]['experience_growth'].mean()
print("Average Experience Growth for Legendary Pokémon:", avg_exp_growth_legendary)
print("Average Experience Growth for Non-Legendary Pokémon:", avg_exp_growth_non_legendary)
# Bar plot for Average Experience Growth by Legendary Status
plt.figure(figsize=(8, 6))
sns.barplot(x=['Legendary', 'Non-Legendary'], y=[avg_exp_growth_legendary, avg_exp_growth_non_legendary])
plt.title('Average Experience Growth: Legendary vs Non-Legendary Pokémon')
plt.xlabel('Category')
plt.ylabel('Average Experience Growth')
plt.show()
# Compare Experience Growth with Base Total
plt.figure(figsize=(12, 6))
sns.scatterplot(x='base_total', y='experience_growth', data=data)
plt.title('Experience Growth vs Base Total')
plt.xlabel('Base Total')
plt.ylabel('Experience Growth')
plt.show()
# Calculate and display the correlation coefficient
correlation_exp_growth_base_total = pearsonr(data['base_total'], data['experience_growth'])
print("Correlation between Base Total and Experience Growth:\n", correlation_exp_growth_base_total)
Average Experience Growth by Generation: generation 2 1.025866e+06 6 1.050528e+06 1 1.052281e+06 4 1.055664e+06 3 1.058856e+06 5 1.061557e+06 7 1.080352e+06 Name: experience_growth, dtype: float64
Average Experience Growth for Legendary Pokémon: 1241851.142857143 Average Experience Growth for Non-Legendary Pokémon: 1037102.7906976744
Correlation between Base Total and Experience Growth: PearsonRResult(statistic=0.2569300707459611, pvalue=1.5293506350014974e-13)
avg_exp_growth_by_type = data.groupby('type1')['experience_growth'].mean().sort_values()
print("Average Experience Growth by Type:\n", avg_exp_growth_by_type)
# Bar plot for Average Experience Growth by Type
plt.figure(figsize=(12, 6))
sns.barplot(x=avg_exp_growth_by_type.index, y=avg_exp_growth_by_type.values)
plt.title('Average Experience Growth by Pokémon Type')
plt.xlabel('Type')
plt.ylabel('Average Experience Growth')
plt.xticks(rotation=90)
plt.show()
Average Experience Growth by Type: type1 fairy 9.138889e+05 rock 9.793116e+05 bug 1.009006e+06 normal 1.009973e+06 ghost 1.018117e+06 water 1.056716e+06 fire 1.064735e+06 ground 1.069652e+06 electric 1.073312e+06 fighting 1.076021e+06 psychic 1.079405e+06 grass 1.079791e+06 flying 1.083333e+06 ice 1.086069e+06 poison 1.086849e+06 dark 1.102720e+06 steel 1.126232e+06 dragon 1.216667e+06 Name: experience_growth, dtype: float64
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
# Load your dataset into a DataFrame (assuming data is your DataFrame)
# data = pd.read_csv('path_to_pokemon_dataset.csv')
# Select relevant features
features = ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'generation', 'base_total']
X = data[features]
y = data['is_legendary']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize the XGBoost classifier
xgb_classifier = xgb.XGBClassifier(objective='binary:logistic', random_state=42)
# Train the classifier
xgb_classifier.fit(X_train, y_train)
# Predict on the test set
y_pred = xgb_classifier.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print("Model Performance:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
Model Performance:
Accuracy: 0.96
Precision: 0.82
Recall: 0.78
F1 Score: 0.80
Classification Report:
precision recall f1-score support
0 0.97 0.98 0.98 143
1 0.82 0.78 0.80 18
accuracy 0.96 161
macro avg 0.90 0.88 0.89 161
weighted avg 0.96 0.96 0.96 161